import {
  BoundingBox,
  PdfJsonDocument,
  PdfJsonPage,
  PdfJsonTextElement,
  PdfJsonImageElement,
  TextGroup,
  DEFAULT_PAGE_HEIGHT,
  DEFAULT_PAGE_WIDTH,
} from '@app/tools/pdfTextEditor/pdfTextEditorTypes';

const LINE_TOLERANCE = 2;
const GAP_FACTOR = 0.6;
const SPACE_MIN_GAP = 1.5;
const MIN_CHAR_WIDTH_FACTOR = 0.35;
const MAX_CHAR_WIDTH_FACTOR = 1.25;
const EXTRA_GAP_RATIO = 0.8;

type FontMetrics = {
  unitsPerEm: number;
  ascent: number;
  descent: number;
};

type FontMetricsMap = Map<string, FontMetrics>;

const sanitizeParagraphText = (text: string | undefined | null): string => {
  if (!text) {
    return '';
  }
  return text.replace(/\r?\n/g, '');
};

const splitParagraphIntoLines = (text: string | undefined | null): string[] => {
  if (text === null || text === undefined) {
    return [''];
  }
  return text.replace(/\r/g, '').split('\n');
};

const extractElementBaseline = (element: PdfJsonTextElement): number | null => {
  if (!element) {
    return null;
  }
  if (element.textMatrix && element.textMatrix.length >= 6) {
    const baseline = element.textMatrix[5];
    return typeof baseline === 'number' ? baseline : null;
  }
  if (typeof element.y === 'number') {
    return element.y;
  }
  return null;
};

const shiftElementsBy = (elements: PdfJsonTextElement[], delta: number): PdfJsonTextElement[] => {
  if (delta === 0) {
    return elements.map(cloneTextElement);
  }
  return elements.map((element) => {
    const clone = cloneTextElement(element);
    if (clone.textMatrix && clone.textMatrix.length >= 6) {
      const matrix = [...clone.textMatrix];
      matrix[5] = (matrix[5] ?? 0) + delta;
      clone.textMatrix = matrix;
    }
    if (typeof clone.y === 'number') {
      clone.y += delta;
    } else if (clone.y === null || clone.y === undefined) {
      clone.y = delta;
    }
    return clone;
  });
};

const countGraphemes = (text: string): number => {
  if (!text) {
    return 0;
  }
  return Array.from(text).length;
};

const metricsFor = (metrics: FontMetricsMap | undefined, fontId?: string | null): FontMetrics | undefined => {
  if (!metrics || !fontId) {
    return undefined;
  }
  return metrics.get(fontId) ?? undefined;
};

const buildFontMetrics = (document: PdfJsonDocument | null | undefined): FontMetricsMap => {
  const metrics: FontMetricsMap = new Map();
  document?.fonts?.forEach((font) => {
    if (!font) {
      return;
    }
    const unitsPerEm = font.unitsPerEm && font.unitsPerEm > 0 ? font.unitsPerEm : 1000;
    const ascent = font.ascent ?? unitsPerEm * 0.8;
    const descent = font.descent ?? -(unitsPerEm * 0.2);
    const metric: FontMetrics = { unitsPerEm, ascent, descent };
    if (font.id) {
      metrics.set(font.id, metric);
    }
    if (font.uid) {
      metrics.set(font.uid, metric);
    }
  });
  return metrics;
};

export const valueOr = (value: number | null | undefined, fallback = 0): number => {
  if (value === null || value === undefined || Number.isNaN(value)) {
    return fallback;
  }
  return value;
};

export const cloneTextElement = (element: PdfJsonTextElement): PdfJsonTextElement => ({
  ...element,
  textMatrix: element.textMatrix ? [...element.textMatrix] : element.textMatrix ?? undefined,
});

const clearGlyphHints = (element: PdfJsonTextElement): void => {
  if (!element) {
    return;
  }
  element.charCodes = undefined;
};

export const cloneImageElement = (element: PdfJsonImageElement): PdfJsonImageElement => ({
  ...element,
  transform: element.transform ? [...element.transform] : element.transform ?? undefined,
});

const getBaseline = (element: PdfJsonTextElement): number => {
  if (element.textMatrix && element.textMatrix.length === 6) {
    return valueOr(element.textMatrix[5]);
  }
  return valueOr(element.y);
};

const getX = (element: PdfJsonTextElement): number => {
  if (element.textMatrix && element.textMatrix.length === 6) {
    return valueOr(element.textMatrix[4]);
  }
  return valueOr(element.x);
};

const getWidth = (element: PdfJsonTextElement, metrics?: FontMetricsMap): number => {
  const width = valueOr(element.width, 0);
  if (width > 0) {
    return width;
  }

  const text = element.text ?? '';
  const glyphCount = Math.max(1, countGraphemes(text));
  const spacingFallback = Math.max(
    valueOr(element.spaceWidth, 0),
    valueOr(element.wordSpacing, 0),
    valueOr(element.characterSpacing, 0),
  );

  if (spacingFallback > 0 && text.trim().length === 0) {
    return spacingFallback;
  }

  const fontSize = getFontSize(element);
  const fontMetrics = metricsFor(metrics, element.fontId);
  if (fontMetrics) {
    const unitsPerEm = fontMetrics.unitsPerEm > 0 ? fontMetrics.unitsPerEm : 1000;
    const ascentUnits = fontMetrics.ascent ?? unitsPerEm * 0.8;
    const descentUnits = Math.abs(fontMetrics.descent ?? -(unitsPerEm * 0.2));
    const combinedUnits = Math.max(unitsPerEm * 0.8, ascentUnits + descentUnits);
    const averageAdvanceUnits = Math.max(unitsPerEm * 0.5, combinedUnits / Math.max(1, glyphCount));
    const fallbackWidth = (averageAdvanceUnits / unitsPerEm) * glyphCount * fontSize;
    if (fallbackWidth > 0) {
      return fallbackWidth;
    }
  }

  return fontSize * glyphCount * 0.5;
};

const getFontSize = (element: PdfJsonTextElement): number => valueOr(element.fontMatrixSize ?? element.fontSize, 12);

const getHeight = (element: PdfJsonTextElement, metrics?: FontMetricsMap): number => {
  const height = valueOr(element.height, 0);
  if (height > 0) {
    return height;
  }
  const fontSize = getFontSize(element);
  const fontMetrics = metricsFor(metrics, element.fontId);
  if (fontMetrics) {
    const unitsPerEm = fontMetrics.unitsPerEm > 0 ? fontMetrics.unitsPerEm : 1000;
    const ascentUnits = fontMetrics.ascent ?? unitsPerEm * 0.8;
    const descentUnits = Math.abs(fontMetrics.descent ?? -(unitsPerEm * 0.2));
    const totalUnits = Math.max(unitsPerEm, ascentUnits + descentUnits);
    if (totalUnits > 0) {
      return (totalUnits / unitsPerEm) * fontSize;
    }
  }
  return fontSize;
};

const getElementBounds = (
  element: PdfJsonTextElement,
  metrics?: FontMetricsMap,
): BoundingBox => {
  const left = getX(element);
  const width = getWidth(element, metrics);
  const baseline = getBaseline(element);
  const height = getHeight(element, metrics);

  let ascentRatio = 0.8;
  let descentRatio = 0.2;
  const fontMetrics = metricsFor(metrics, element.fontId);
  if (fontMetrics) {
    const unitsPerEm = fontMetrics.unitsPerEm > 0 ? fontMetrics.unitsPerEm : 1000;
    const ascentUnits = fontMetrics.ascent ?? unitsPerEm * 0.8;
    const descentUnits = Math.abs(fontMetrics.descent ?? -(unitsPerEm * 0.2));
    const totalUnits = Math.max(unitsPerEm, ascentUnits + descentUnits);
    if (totalUnits > 0) {
      ascentRatio = ascentUnits / totalUnits;
      descentRatio = descentUnits / totalUnits;
    }
  }

  const bottom = baseline + height * ascentRatio;
  const top = baseline - height * descentRatio;
  return {
    left,
    right: left + width,
    top,
    bottom,
  };
};

export const getImageBounds = (element: PdfJsonImageElement): BoundingBox => {
  const left = valueOr(element.left ?? element.x, 0);
  const computedWidth = valueOr(element.width, Math.max(valueOr(element.right, left) - left, 0));
  const right = valueOr(element.right ?? left + computedWidth, left + computedWidth);
  const bottom = valueOr(element.bottom ?? element.y, 0);
  const computedHeight = valueOr(element.height, Math.max(valueOr(element.top, bottom) - bottom, 0));
  const top = valueOr(element.top ?? bottom + computedHeight, bottom + computedHeight);
  return {
    left,
    right,
    bottom,
    top,
  };
};

const getSpacingHint = (element: PdfJsonTextElement): number => {
  const spaceWidth = valueOr(element.spaceWidth, 0);
  if (spaceWidth > 0) {
    return spaceWidth;
  }
  const wordSpacing = valueOr(element.wordSpacing, 0);
  if (wordSpacing > 0) {
    return wordSpacing;
  }
  const characterSpacing = valueOr(element.characterSpacing, 0);
  return Math.max(characterSpacing, 0);
};

const estimateCharWidth = (
  element: PdfJsonTextElement,
  avgFontSize: number,
  metrics?: FontMetricsMap,
): number => {
  const rawWidth = getWidth(element, metrics);
  const minWidth = avgFontSize * MIN_CHAR_WIDTH_FACTOR;
  const maxWidth = avgFontSize * MAX_CHAR_WIDTH_FACTOR;
  return Math.min(Math.max(rawWidth, minWidth), maxWidth);
};

const mergeBounds = (bounds: BoundingBox[]): BoundingBox => {
  if (bounds.length === 0) {
    return { left: 0, right: 0, top: 0, bottom: 0 };
  }
  return bounds.reduce(
    (acc, current) => ({
      left: Math.min(acc.left, current.left),
      right: Math.max(acc.right, current.right),
      top: Math.min(acc.top, current.top),
      bottom: Math.max(acc.bottom, current.bottom),
    }),
    { ...bounds[0] }
  );
};

const shouldInsertSpace = (
  prev: PdfJsonTextElement,
  current: PdfJsonTextElement,
  metrics?: FontMetricsMap,
): boolean => {
  const prevRight = getX(prev) + getWidth(prev, metrics);
  const trailingGap = Math.max(0, getX(current) - prevRight);
  const avgFontSize = (getFontSize(prev) + getFontSize(current)) / 2;
  const baselineAdvance = Math.max(0, getX(current) - getX(prev));
  const charWidthEstimate = estimateCharWidth(prev, avgFontSize, metrics);
  const inferredGap = Math.max(0, baselineAdvance - charWidthEstimate);
  const spacingHint = Math.max(
    SPACE_MIN_GAP,
    getSpacingHint(prev),
    getSpacingHint(current),
    avgFontSize * GAP_FACTOR,
  );

  if (trailingGap > spacingHint) {
    return true;
  }

  if (inferredGap > spacingHint * EXTRA_GAP_RATIO) {
    return true;
  }

  const prevText = (prev.text ?? '').trimEnd();
  if (prevText.endsWith('-')) {
    return false;
  }

  return false;
};

const buildGroupText = (elements: PdfJsonTextElement[], metrics?: FontMetricsMap): string => {
  let result = '';
  elements.forEach((element, index) => {
    const value = element.text ?? '';
    if (index === 0) {
      result += value;
      return;
    }

    const previous = elements[index - 1];
    const needsSpace = shouldInsertSpace(previous, element, metrics);
    const startsWithWhitespace = /^\s/u.test(value);

    if (needsSpace && !startsWithWhitespace) {
      result += ' ';
    }
    result += value;
  });
  return result;
};

const rgbToCss = (components: number[]): string => {
  if (components.length >= 3) {
    const r = Math.round(Math.max(0, Math.min(1, components[0])) * 255);
    const g = Math.round(Math.max(0, Math.min(1, components[1])) * 255);
    const b = Math.round(Math.max(0, Math.min(1, components[2])) * 255);
    return `rgb(${r}, ${g}, ${b})`;
  }
  return 'rgb(0, 0, 0)';
};

const cmykToCss = (components: number[]): string => {
  if (components.length >= 4) {
    const c = Math.max(0, Math.min(1, components[0]));
    const m = Math.max(0, Math.min(1, components[1]));
    const y = Math.max(0, Math.min(1, components[2]));
    const k = Math.max(0, Math.min(1, components[3]));
    const r = Math.round(255 * (1 - c) * (1 - k));
    const g = Math.round(255 * (1 - m) * (1 - k));
    const b = Math.round(255 * (1 - y) * (1 - k));
    return `rgb(${r}, ${g}, ${b})`;
  }
  return 'rgb(0, 0, 0)';
};

const grayToCss = (components: number[]): string => {
  if (components.length >= 1) {
    const gray = Math.round(Math.max(0, Math.min(1, components[0])) * 255);
    return `rgb(${gray}, ${gray}, ${gray})`;
  }
  return 'rgb(0, 0, 0)';
};

const extractColor = (element: PdfJsonTextElement): string | null => {
  const fillColor = element.fillColor;
  if (!fillColor || !fillColor.components || fillColor.components.length === 0) {
    return null;
  }

  const colorSpace = (fillColor.colorSpace ?? '').toLowerCase();

  if (colorSpace.includes('rgb') || colorSpace.includes('srgb')) {
    return rgbToCss(fillColor.components);
  }
  if (colorSpace.includes('cmyk')) {
    return cmykToCss(fillColor.components);
  }
  if (colorSpace.includes('gray') || colorSpace.includes('grey')) {
    return grayToCss(fillColor.components);
  }

  // Default to RGB interpretation
  if (fillColor.components.length >= 3) {
    return rgbToCss(fillColor.components);
  }
  if (fillColor.components.length === 1) {
    return grayToCss(fillColor.components);
  }

  return null;
};

const RAD_TO_DEG = 180 / Math.PI;

const normalizeAngle = (angle: number): number => {
  let normalized = angle % 360;
  if (normalized > 180) {
    normalized -= 360;
  } else if (normalized <= -180) {
    normalized += 360;
  }
  return normalized;
};

const extractElementRotation = (element: PdfJsonTextElement): number | null => {
  const matrix = element.textMatrix;
  if (!matrix || matrix.length !== 6) {
    return null;
  }
  const a = matrix[0];
  const b = matrix[1];
  if (Math.abs(a) < 1e-6 && Math.abs(b) < 1e-6) {
    return null;
  }
  const angle = Math.atan2(b, a) * RAD_TO_DEG;
  if (Math.abs(angle) < 0.5) {
    return null;
  }
  return normalizeAngle(angle);
};

const computeGroupRotation = (elements: PdfJsonTextElement[]): number | null => {
  const angles = elements
    .map(extractElementRotation)
    .filter((angle): angle is number => angle !== null);
  if (angles.length === 0) {
    return null;
  }
  const vector = angles.reduce(
    (acc, angle) => {
      const radians = (angle * Math.PI) / 180;
      acc.x += Math.cos(radians);
      acc.y += Math.sin(radians);
      return acc;
    },
    { x: 0, y: 0 },
  );
  if (Math.abs(vector.x) < 1e-6 && Math.abs(vector.y) < 1e-6) {
    return null;
  }
  const average = Math.atan2(vector.y, vector.x) * RAD_TO_DEG;
  const normalized = normalizeAngle(average);
  return Math.abs(normalized) < 0.5 ? null : normalized;
};

const getAnchorPoint = (element: PdfJsonTextElement): { x: number; y: number } => {
  if (element.textMatrix && element.textMatrix.length === 6) {
    return {
      x: valueOr(element.textMatrix[4]),
      y: valueOr(element.textMatrix[5]),
    };
  }
  return {
    x: valueOr(element.x),
    y: valueOr(element.y),
  };
};

const computeBaselineLength = (
  elements: PdfJsonTextElement[],
  metrics?: FontMetricsMap,
): number => elements.reduce((acc, current) => acc + getWidth(current, metrics), 0);

const computeAverageBaseline = (elements: PdfJsonTextElement[]): number | null => {
  if (elements.length === 0) {
    return null;
  }
  let sum = 0;
  elements.forEach((element) => {
    sum += getBaseline(element);
  });
  return sum / elements.length;
};

const createGroup = (
  pageIndex: number,
  idSuffix: number,
  elements: PdfJsonTextElement[],
  metrics?: FontMetricsMap,
): TextGroup => {
  const clones = elements.map(cloneTextElement);
  const originalClones = clones.map(cloneTextElement);
  const bounds = mergeBounds(elements.map((element) => getElementBounds(element, metrics)));
  const firstElement = elements[0];
  const rotation = computeGroupRotation(elements);
  const anchor = rotation !== null ? getAnchorPoint(firstElement) : null;
  const baselineLength = computeBaselineLength(elements, metrics);
  const baseline = computeAverageBaseline(elements);

  return {
    id: `${pageIndex}-${idSuffix}`,
    pageIndex,
    fontId: firstElement?.fontId,
    fontSize: firstElement?.fontSize,
    fontMatrixSize: firstElement?.fontMatrixSize,
    color: firstElement ? extractColor(firstElement) : null,
    fontWeight: null, // Will be determined from font descriptor
    rotation,
    anchor,
    baselineLength,
    baseline,
    elements: clones,
    originalElements: originalClones,
    text: buildGroupText(elements, metrics),
    originalText: buildGroupText(elements, metrics),
    bounds,
  };
};

const cloneLineTemplate = (line: TextGroup): TextGroup => ({
  ...line,
  childLineGroups: null,
  lineElementCounts: null,
  lineSpacing: null,
  elements: line.elements.map(cloneTextElement),
  originalElements: line.originalElements.map(cloneTextElement),
});

const groupLinesIntoParagraphs = (
  lineGroups: TextGroup[],
  pageWidth: number,
  metrics?: FontMetricsMap,
): TextGroup[] => {
  if (lineGroups.length === 0) {
    return [];
  }

  const paragraphs: TextGroup[][] = [];
  let currentParagraph: TextGroup[] = [lineGroups[0]];
  const bulletFlags = new Map<string, boolean>();
  bulletFlags.set(lineGroups[0].id, false);

  for (let i = 1; i < lineGroups.length; i++) {
    const prevLine = lineGroups[i - 1];
    const currentLine = lineGroups[i];

    // Calculate line spacing
    const prevBaseline = prevLine.baseline ?? 0;
    const currentBaseline = currentLine.baseline ?? 0;
    const lineSpacing = Math.abs(prevBaseline - currentBaseline);

    // Calculate average font size
    const prevFontSize = prevLine.fontSize ?? 12;
    const currentFontSize = currentLine.fontSize ?? 12;
    const avgFontSize = (prevFontSize + currentFontSize) / 2;

    // Check horizontal alignment (left edge)
    const prevLeft = prevLine.bounds.left;
    const currentLeft = currentLine.bounds.left;
    const leftAlignmentTolerance = avgFontSize * 0.3;
    const isLeftAligned = Math.abs(prevLeft - currentLeft) <= leftAlignmentTolerance;

    // Check if fonts match
    const sameFont = prevLine.fontId === currentLine.fontId;

    // Check for consistent spacing rather than expected spacing
    // Line spacing in PDFs can range from 1.0x to 3.0x font size
    // We just want to ensure spacing is consistent between consecutive lines
    // and not excessively large (which would indicate a paragraph break)
    const maxReasonableSpacing = avgFontSize * 3.0; // Max ~3x font size for normal line spacing
    const hasReasonableSpacing = lineSpacing <= maxReasonableSpacing;

    // Check if current line looks like a bullet/list item
    const prevRight = prevLine.bounds.right;
    const currentRight = currentLine.bounds.right;
    const prevWidth = prevRight - prevLeft;
    const currentWidth = currentRight - currentLeft;

    // Count word count to help identify bullets (typically short)
    const prevWords = (prevLine.text ?? '').split(/\s+/).filter(w => w.length > 0).length;
    const currentWords = (currentLine.text ?? '').split(/\s+/).filter(w => w.length > 0).length;
    const prevText = (prevLine.text ?? '').trim();
    const currentText = (currentLine.text ?? '').trim();

    // Bullet detection - look for bullet markers or very short lines
    const bulletMarkerRegex = /^[\u2022\u2023\u25E6\u2043\u2219•·◦‣⁃\-*]\s|^\d+[.)]\s|^[a-z][.)]\s/i;
    const prevHasBulletMarker = bulletMarkerRegex.test(prevText);
    const currentHasBulletMarker = bulletMarkerRegex.test(currentText);

    // True bullets are:
    // 1. Have bullet markers/numbers OR
    // 2. Very short (< 10 words) AND much narrower than average (< 60% of page width)
    const headingKeywords = ['action items', 'next steps', 'notes', 'logistics', 'tasks'];
    const normalizedPageWidth = pageWidth > 0 ? pageWidth : avgFontSize * 70;
    const maxReferenceWidth = normalizedPageWidth > 0 ? normalizedPageWidth : avgFontSize * 70;
    const indentDelta = currentLeft - prevLeft;
    const indentThreshold = Math.max(avgFontSize * 0.6, 8);
    const hasIndent = indentDelta > indentThreshold;
    const currentWidthRatio = maxReferenceWidth > 0 ? currentWidth / maxReferenceWidth : 0;
    const prevWidthRatio = maxReferenceWidth > 0 ? prevWidth / maxReferenceWidth : 0;
    const prevLooksLikeHeading =
      prevText.endsWith(':') ||
      (prevWords <= 4 && prevWidthRatio < 0.4) ||
      headingKeywords.some((keyword) => prevText.toLowerCase().includes(keyword));

    const wrapCandidate =
      !currentHasBulletMarker &&
      !hasIndent &&
      !prevLooksLikeHeading &&
      currentWords <= 12 &&
      currentWidthRatio < 0.45 &&
      Math.abs(prevLeft - currentLeft) <= leftAlignmentTolerance &&
      currentWidth < prevWidth * 0.85;

    const currentIsBullet = wrapCandidate
      ? false
      : currentHasBulletMarker ||
        (hasIndent && (currentWords <= 14 || currentWidthRatio <= 0.65)) ||
        (prevLooksLikeHeading && (currentWords <= 16 || currentWidthRatio <= 0.8 || prevWidthRatio < 0.35)) ||
        (currentWords <= 8 && currentWidthRatio <= 0.45 && prevWidth - currentWidth > avgFontSize * 4);

    const prevIsBullet = bulletFlags.get(prevLine.id) ?? prevHasBulletMarker;
    bulletFlags.set(currentLine.id, currentIsBullet);

    // Detect paragraph→bullet transition
    const likelyBulletStart = !prevIsBullet && currentIsBullet;

    // Don't merge two consecutive bullets
    const bothAreBullets = prevIsBullet && currentIsBullet;

    // Merge into paragraph if:
    // 1. Left aligned
    // 2. Same font
    // 3. Reasonable line spacing
    // 4. NOT transitioning to bullets
    // 5. NOT both are bullets
    const shouldMerge =
      isLeftAligned &&
      sameFont &&
      hasReasonableSpacing &&
      !likelyBulletStart &&
      !bothAreBullets &&
      !currentIsBullet;

    if (i < 10 || likelyBulletStart || bothAreBullets || !shouldMerge) {
      console.log(`  Line ${i}:`);
      console.log(`    prev: "${prevText.substring(0, 40)}" (${prevWords}w, ${prevWidth.toFixed(0)}pt, marker:${prevHasBulletMarker}, bullet:${prevIsBullet})`);
      console.log(`    curr: "${currentText.substring(0, 40)}" (${currentWords}w, ${currentWidth.toFixed(0)}pt, marker:${currentHasBulletMarker}, bullet:${currentIsBullet})`);
      console.log(`    checks: leftAlign:${isLeftAligned} (${Math.abs(prevLeft - currentLeft).toFixed(1)}pt), sameFont:${sameFont}, spacing:${hasReasonableSpacing} (${lineSpacing.toFixed(1)}pt/${maxReasonableSpacing.toFixed(1)}pt)`);
      console.log(`    decision: merge=${shouldMerge} (bulletStart:${likelyBulletStart}, bothBullets:${bothAreBullets})`);
    }

    if (shouldMerge) {
      currentParagraph.push(currentLine);
    } else {
      paragraphs.push(currentParagraph);
      currentParagraph = [currentLine];
    }
  }

  // Don't forget the last paragraph
  if (currentParagraph.length > 0) {
    paragraphs.push(currentParagraph);
  }

  // Merge line groups into single paragraph groups
  return paragraphs.map((lines, _paragraphIndex) => {
    if (lines.length === 1) {
      return lines[0];
    }

    // Combine all elements from all lines
    const lineTemplates = lines.map(line => cloneLineTemplate(line));
    const flattenedLineTemplates = lineTemplates.flatMap((line) =>
      line.childLineGroups && line.childLineGroups.length > 0
        ? line.childLineGroups
        : [line],
    );
    const allLines = flattenedLineTemplates.length > 0 ? flattenedLineTemplates : lineTemplates;
    const allElements = allLines.flatMap(line => line.originalElements);
    const pageIndex = lines[0].pageIndex;
    const lineElementCounts = allLines.map((line) => line.originalElements.length);

    // Create merged group with newlines between lines
    const paragraphText = allLines.map(line => line.text).join('\n');
    const mergedBounds = mergeBounds(allLines.map(line => line.bounds));
    const spacingValues: number[] = [];
    for (let i = 1; i < allLines.length; i++) {
      const prevBaseline = allLines[i - 1].baseline ?? allLines[i - 1].bounds.bottom;
      const currentBaseline = allLines[i].baseline ?? allLines[i].bounds.bottom;
      const spacing = Math.abs(prevBaseline - currentBaseline);
      if (spacing > 0) {
        spacingValues.push(spacing);
      }
    }
    const averageSpacing =
      spacingValues.length > 0
        ? spacingValues.reduce((sum, value) => sum + value, 0) / spacingValues.length
        : null;

    const firstElement = allElements[0];
    const rotation = computeGroupRotation(allElements);
    const anchor = rotation !== null ? getAnchorPoint(firstElement) : null;
    const baselineLength = computeBaselineLength(allElements, metrics);
    const baseline = computeAverageBaseline(allElements);

    return {
      id: lines[0].id, // Keep the first line's ID
      pageIndex,
      fontId: firstElement?.fontId,
      fontSize: firstElement?.fontSize,
      fontMatrixSize: firstElement?.fontMatrixSize,
      lineSpacing: averageSpacing,
      lineElementCounts: lines.length > 1 ? lineElementCounts : null,
      color: firstElement ? extractColor(firstElement) : null,
      fontWeight: null,
      rotation,
      anchor,
      baselineLength,
      baseline,
      elements: allElements.map(cloneTextElement),
      originalElements: allElements.map(cloneTextElement),
      text: paragraphText,
      originalText: paragraphText,
      bounds: mergedBounds,
      childLineGroups: allLines,
    };
  });
};

export const groupPageTextElements = (
  page: PdfJsonPage | null | undefined,
  pageIndex: number,
  metrics?: FontMetricsMap,
  groupingMode: 'auto' | 'paragraph' | 'singleLine' = 'auto',
): TextGroup[] => {
  if (!page?.textElements || page.textElements.length === 0) {
    return [];
  }

  const pageWidth = valueOr(page.width, DEFAULT_PAGE_WIDTH);

  const elements = page.textElements
    .map(cloneTextElement)
    .filter((element) => element.text !== null && element.text !== undefined);

  elements.sort((a, b) => getBaseline(b) - getBaseline(a));

  const lines: { baseline: number; elements: PdfJsonTextElement[] }[] = [];

  elements.forEach((element) => {
    const baseline = getBaseline(element);
    const fontSize = getFontSize(element);
    const tolerance = Math.max(LINE_TOLERANCE, fontSize * 0.12);

    const existingLine = lines.find((line) => Math.abs(line.baseline - baseline) <= tolerance);

    if (existingLine) {
      existingLine.elements.push(element);
    } else {
      lines.push({ baseline, elements: [element] });
    }
  });

  lines.forEach((line) => {
    line.elements.sort((a, b) => getX(a) - getX(b));
  });

  let groupCounter = 0;
  const lineGroups: TextGroup[] = [];

  lines.forEach((line) => {
    let currentBucket: PdfJsonTextElement[] = [];

    line.elements.forEach((element) => {
      if (currentBucket.length === 0) {
        currentBucket.push(element);
        return;
      }

      const previous = currentBucket[currentBucket.length - 1];
      const gap = getX(element) - (getX(previous) + getWidth(previous, metrics));
      const avgFontSize = (getFontSize(previous) + getFontSize(element)) / 2;
      const splitThreshold = Math.max(SPACE_MIN_GAP, avgFontSize * GAP_FACTOR);

      const sameFont = previous.fontId === element.fontId;
      let shouldSplit = gap > splitThreshold * (sameFont ? 1.4 : 1.0);

      if (shouldSplit) {
        const prevBaseline = getBaseline(previous);
        const currentBaseline = getBaseline(element);
        const baselineDelta = Math.abs(prevBaseline - currentBaseline);
        const prevEndX = getX(previous) + getWidth(previous, metrics);
        const _prevEndY = prevBaseline;
        const diagonalGap = Math.hypot(Math.max(0, getX(element) - prevEndX), baselineDelta);
        const diagonalThreshold = Math.max(avgFontSize * 0.8, splitThreshold);
        if (diagonalGap <= diagonalThreshold) {
          shouldSplit = false;
        }
      }

      const previousRotation = extractElementRotation(previous);
      const currentRotation = extractElementRotation(element);
      if (
        shouldSplit &&
        previousRotation !== null &&
        currentRotation !== null &&
        Math.abs(normalizeAngle(previousRotation - currentRotation)) < 1
      ) {
        shouldSplit = false;
      }

      if (shouldSplit) {
        lineGroups.push(createGroup(pageIndex, groupCounter, currentBucket, metrics));
        groupCounter += 1;
        currentBucket = [element];
      } else {
        currentBucket.push(element);
      }
    });

    if (currentBucket.length > 0) {
      lineGroups.push(createGroup(pageIndex, groupCounter, currentBucket, metrics));
      groupCounter += 1;
    }
  });

  // Apply paragraph grouping based on mode
  if (groupingMode === 'singleLine') {
    // Single line mode: skip paragraph grouping
    return lineGroups;
  }

  if (groupingMode === 'paragraph') {
    // Paragraph mode: always apply grouping
    return groupLinesIntoParagraphs(lineGroups, pageWidth, metrics);
  }

  // Auto mode: use heuristic to determine if we should group
  // Analyze the page content to decide
  let multiLineGroups = 0;
  let totalWords = 0;
  let longTextGroups = 0;
  let totalGroups = 0;
  const wordCounts: number[] = [];
  let fullWidthLines = 0;

  // Define "full width" as extending to at least 70% of page width
  const fullWidthThreshold = pageWidth * 0.7;

  lineGroups.forEach((group) => {
    const text = (group.text || '').trim();
    if (text.length === 0) return;

    totalGroups++;
    const lines = text.split('\n');
    const lineCount = lines.length;
    const wordCount = text.split(/\s+/).filter((w) => w.length > 0).length;

    totalWords += wordCount;
    wordCounts.push(wordCount);

    if (lineCount > 1) {
      multiLineGroups++;
    }

    if (wordCount >= 10 || text.length >= 50) {
      longTextGroups++;
    }

    // Check if this line extends close to the right margin (paragraph-like)
    const rightEdge = group.bounds.right;
    if (rightEdge >= fullWidthThreshold) {
      fullWidthLines++;
    }
  });

  if (totalGroups === 0) {
    return lineGroups;
  }

  const avgWordsPerGroup = totalWords / totalGroups;
  const longTextRatio = longTextGroups / totalGroups;
  const fullWidthRatio = fullWidthLines / totalGroups;

  // Calculate variance in line lengths (paragraphs have varying lengths, lists are uniform)
  const variance = wordCounts.reduce((sum, count) => {
    const diff = count - avgWordsPerGroup;
    return sum + diff * diff;
  }, 0) / totalGroups;
  const stdDev = Math.sqrt(variance);
  const coefficientOfVariation = avgWordsPerGroup > 0 ? stdDev / avgWordsPerGroup : 0;

  // Check each criterion
  const criterion1 = avgWordsPerGroup > 5;
  const criterion2 = longTextRatio > 0.4;
  const criterion3 = coefficientOfVariation > 0.5 || fullWidthRatio > 0.6; // High variance OR many full-width lines = paragraph text

  const isParagraphPage = criterion1 && criterion2 && criterion3;

  // Log detection stats
  console.log(`📄 Page ${pageIndex} Grouping Analysis (mode: ${groupingMode}):`);
  console.log(`   Stats:`);
  console.log(`     • Page width: ${pageWidth.toFixed(1)}pt (full-width threshold: ${fullWidthThreshold.toFixed(1)}pt)`);
  console.log(`     • Multi-line groups: ${multiLineGroups}`);
  console.log(`     • Total groups: ${totalGroups}`);
  console.log(`     • Total words: ${totalWords}`);
  console.log(`     • Long text groups (≥10 words or ≥50 chars): ${longTextGroups}`);
  console.log(`     • Full-width lines (≥70% page width): ${fullWidthLines}`);
  console.log(`     • Avg words per group: ${avgWordsPerGroup.toFixed(2)}`);
  console.log(`     • Long text ratio: ${(longTextRatio * 100).toFixed(1)}%`);
  console.log(`     • Full-width ratio: ${(fullWidthRatio * 100).toFixed(1)}%`);
  console.log(`     • Std deviation: ${stdDev.toFixed(2)}`);
  console.log(`     • Coefficient of variation: ${coefficientOfVariation.toFixed(2)}`);
  console.log(`   Criteria:`);
  console.log(`     1. Avg Words Per Group: ${criterion1 ? '✅ PASS' : '❌ FAIL'}`);
  console.log(`        (${avgWordsPerGroup.toFixed(2)} > 5)`);
  console.log(`     2. Long Text Ratio: ${criterion2 ? '✅ PASS' : '❌ FAIL'}`);
  console.log(`        (${(longTextRatio * 100).toFixed(1)}% > 40%)`);
  console.log(`     3. Line Width Pattern: ${criterion3 ? '✅ PASS' : '❌ FAIL'}`);
  console.log(`        (CV ${coefficientOfVariation.toFixed(2)} > 0.5 OR ${(fullWidthRatio * 100).toFixed(1)}% > 60%)`);
  console.log(`        ${coefficientOfVariation > 0.5 ? '✓ High variance (varying line lengths)' : '✗ Low variance'} ${fullWidthRatio > 0.6 ? '✓ Many full-width lines (paragraph-like)' : '✗ Few full-width lines (list-like)'}`);
  console.log(`   Decision: ${isParagraphPage ? '📝 PARAGRAPH MODE' : '📋 LINE MODE'}`);
  if (isParagraphPage) {
    console.log(`   Reason: All three criteria passed (AND logic)`);
  } else {
    const failedReasons = [];
    if (!criterion1) failedReasons.push('low average words per group');
    if (!criterion2) failedReasons.push('low ratio of long text groups');
    if (!criterion3) failedReasons.push('low variance and few full-width lines (list-like structure)');
    console.log(`   Reason: ${failedReasons.join(', ')}`);
  }
  console.log('');

  // Only apply paragraph grouping if it looks like a paragraph-heavy page
  if (isParagraphPage) {
    console.log(`🔀 Applying paragraph grouping to page ${pageIndex}`);
    return groupLinesIntoParagraphs(lineGroups, pageWidth, metrics);
  }

  // For sparse pages, keep lines separate
  console.log(`📋 Keeping lines separate for page ${pageIndex}`);
  return lineGroups;
};

export const groupDocumentText = (
  document: PdfJsonDocument | null | undefined,
  groupingMode: 'auto' | 'paragraph' | 'singleLine' = 'auto',
): TextGroup[][] => {
  const pages = document?.pages ?? [];
  const metrics = buildFontMetrics(document);
  return pages.map((page, index) => groupPageTextElements(page, index, metrics, groupingMode));
};

export const extractPageImages = (
  page: PdfJsonPage | null | undefined,
  pageIndex: number,
): PdfJsonImageElement[] => {
  const images = page?.imageElements ?? [];
  return images.map((image, imageIndex) => {
    const clone = cloneImageElement(image);
    if (!clone.id || clone.id.trim().length === 0) {
      clone.id = `page-${pageIndex}-image-${imageIndex}`;
    }
    return clone;
  });
};

export const extractDocumentImages = (
  document: PdfJsonDocument | null | undefined,
): PdfJsonImageElement[][] => {
  const pages = document?.pages ?? [];
  return pages.map((page, index) => extractPageImages(page, index));
};

export const deepCloneDocument = (document: PdfJsonDocument): PdfJsonDocument => {
  if (typeof structuredClone === 'function') {
    return structuredClone(document);
  }
  return JSON.parse(JSON.stringify(document));
};

export const pageDimensions = (page: PdfJsonPage | null | undefined): { width: number; height: number } => {
  const width = valueOr(page?.width, DEFAULT_PAGE_WIDTH);
  const height = valueOr(page?.height, DEFAULT_PAGE_HEIGHT);

  console.log(`📏 [pageDimensions] Calculating page size:`, {
    hasPage: !!page,
    rawWidth: page?.width,
    rawHeight: page?.height,
    mediaBox: page?.mediaBox,
    cropBox: page?.cropBox,
    rotation: page?.rotation,
    calculatedWidth: width,
    calculatedHeight: height,
    DEFAULT_PAGE_WIDTH,
    DEFAULT_PAGE_HEIGHT,
    commonFormats: {
      'US Letter': '612 × 792 pt',
      'A4': '595 × 842 pt',
      'Legal': '612 × 1008 pt',
    },
  });

  return { width, height };
};

export const createMergedElement = (group: TextGroup): PdfJsonTextElement => {
  const reference = group.originalElements[0];
  const merged = cloneTextElement(reference);
  merged.text = sanitizeParagraphText(group.text);
  clearGlyphHints(merged);
  if (reference.textMatrix && reference.textMatrix.length === 6) {
    merged.textMatrix = [...reference.textMatrix];
  }
  return merged;
};

const distributeTextAcrossElements = (text: string | undefined, elements: PdfJsonTextElement[]): boolean => {
  if (elements.length === 0) {
    return true;
  }

  const normalizedText = sanitizeParagraphText(text);
  const targetChars = Array.from(normalizedText);
  if (targetChars.length === 0) {
    elements.forEach((element) => {
      element.text = '';
      clearGlyphHints(element);
    });
    return true;
  }

  const capacities = elements.map((element) => {
    const originalText = element.text ?? '';
    const graphemeCount = Array.from(originalText).length;
    return graphemeCount > 0 ? graphemeCount : 1;
  });

  let cursor = 0;
  elements.forEach((element, index) => {
    const remaining = targetChars.length - cursor;
    let sliceLength = 0;
    if (remaining > 0) {
      if (index === elements.length - 1) {
        sliceLength = remaining;
      } else {
        const capacity = Math.max(capacities[index], 1);
        const minRemainingForRest = Math.max(elements.length - index - 1, 0);
        sliceLength = Math.min(capacity, Math.max(remaining - minRemainingForRest, 1));
      }
    }

    element.text = sliceLength > 0 ? targetChars.slice(cursor, cursor + sliceLength).join('') : '';
    clearGlyphHints(element);
    cursor += sliceLength;
  });

  elements.forEach((element) => {
    if (element.text == null) {
      element.text = '';
    }
  });

  return true;
};

const sliceElementsByLineCounts = (group: TextGroup): PdfJsonTextElement[][] => {
  const counts = group.lineElementCounts;
  if (!counts || counts.length === 0) {
    if (!group.originalElements.length) {
      return [];
    }
    return [group.originalElements];
  }

  const result: PdfJsonTextElement[][] = [];
  let cursor = 0;
  counts.forEach((count) => {
    if (count <= 0) {
      return;
    }
    const slice = group.originalElements.slice(cursor, cursor + count);
    if (slice.length > 0) {
      result.push(slice);
    }
    cursor += count;
  });
  return result;
};

const rebuildParagraphLineElements = (group: TextGroup): PdfJsonTextElement[] | null => {
  if (!group.text || !group.text.includes('\n')) {
    return null;
  }

  const lineTexts = splitParagraphIntoLines(group.text);
  if (lineTexts.length === 0) {
    return [];
  }

  const lineElementGroups = sliceElementsByLineCounts(group);
  if (!lineElementGroups.length) {
    return null;
  }

  const lineBaselines = lineElementGroups.map((elements) => {
    for (const element of elements) {
      const baseline = extractElementBaseline(element);
      if (baseline !== null) {
        return baseline;
      }
    }
    return group.baseline ?? null;
  });

  const spacingFromBaselines = (() => {
    for (let i = 1; i < lineBaselines.length; i += 1) {
      const prev = lineBaselines[i - 1];
      const current = lineBaselines[i];
      if (prev !== null && current !== null) {
        const diff = Math.abs(prev - current);
        if (diff > 0) {
          return diff;
        }
      }
    }
    return null;
  })();

  const spacing =
    (group.lineSpacing && group.lineSpacing > 0
      ? group.lineSpacing
      : spacingFromBaselines) ??
    Math.max(group.fontMatrixSize ?? group.fontSize ?? 12, 6) * 1.2;

  let direction = -1;
  for (let i = 1; i < lineBaselines.length; i += 1) {
    const prev = lineBaselines[i - 1];
    const current = lineBaselines[i];
    if (prev !== null && current !== null && Math.abs(prev - current) > 0.05) {
      direction = current < prev ? -1 : 1;
      break;
    }
  }

  const templateCount = lineElementGroups.length;
  const lastTemplateIndex = Math.max(templateCount - 1, 0);
  const rebuilt: PdfJsonTextElement[] = [];

  for (let index = 0; index < lineTexts.length; index += 1) {
    const templateIndex = Math.min(index, lastTemplateIndex);
    const templateElements = lineElementGroups[templateIndex];
    if (!templateElements || templateElements.length === 0) {
      return null;
    }

    const shiftSteps = index - templateIndex;
    const delta = shiftSteps * spacing * direction;
    const clones = shiftElementsBy(templateElements, delta);
    const normalizedLine = sanitizeParagraphText(lineTexts[index]);
    const distributed = distributeTextAcrossElements(normalizedLine, clones);

    if (!distributed) {
      const primary = clones[0];
      primary.text = normalizedLine;
      clearGlyphHints(primary);
      for (let i = 1; i < clones.length; i += 1) {
        clones[i].text = '';
        clearGlyphHints(clones[i]);
      }
    }

    rebuilt.push(...clones);
  }

  return rebuilt;
};

export const buildUpdatedDocument = (
  source: PdfJsonDocument,
  groupsByPage: TextGroup[][],
  imagesByPage: PdfJsonImageElement[][],
): PdfJsonDocument => {
  const updated = deepCloneDocument(source);
  const pages = updated.pages ?? [];

  updated.pages = pages.map((page, pageIndex) => {
    const groups = groupsByPage[pageIndex] ?? [];
    const images = imagesByPage[pageIndex] ?? [];
    if (!groups.length) {
      return {
        ...page,
        imageElements: images.map(cloneImageElement),
      };
    }

    const updatedElements: PdfJsonTextElement[] = groups.flatMap((group) => {
      if (group.text === group.originalText) {
        return group.originalElements.map(cloneTextElement);
      }
      return [createMergedElement(group)];
    });

    return {
      ...page,
      textElements: updatedElements,
      imageElements: images.map(cloneImageElement),
      contentStreams: page.contentStreams ?? [],
    };
  });

  return updated;
};

export const restoreGlyphElements = (
  source: PdfJsonDocument,
  groupsByPage: TextGroup[][],
  imagesByPage: PdfJsonImageElement[][],
  originalImagesByPage: PdfJsonImageElement[][],
  forceMergedGroups: boolean = false,
): PdfJsonDocument => {
  const updated = deepCloneDocument(source);
  const pages = updated.pages ?? [];

  updated.pages = pages.map((page, pageIndex) => {
    const groups = groupsByPage[pageIndex] ?? [];
    const images = imagesByPage[pageIndex] ?? [];
    const _baselineImages = originalImagesByPage[pageIndex] ?? [];

    if (!groups.length) {
      return {
        ...page,
        imageElements: images.map(cloneImageElement),
      };
    }

    const rebuiltElements: PdfJsonTextElement[] = [];

    groups.forEach((group) => {
      if (group.text !== group.originalText) {
        // Always try to rebuild paragraph lines if text has newlines
        const paragraphElements = rebuildParagraphLineElements(group);
        if (paragraphElements && paragraphElements.length > 0) {
          rebuiltElements.push(...paragraphElements);
          return;
        }
        // If no newlines or rebuilding failed, check if we should force merge
        if (forceMergedGroups) {
          rebuiltElements.push(createMergedElement(group));
          return;
        }
        const originalGlyphCount = group.originalElements.reduce(
          (sum, element) => sum + countGraphemes(element.text ?? ''),
          0,
        );
        const normalizedText = sanitizeParagraphText(group.text);
        const targetGlyphCount = countGraphemes(normalizedText);

        if (targetGlyphCount !== originalGlyphCount) {
          rebuiltElements.push(createMergedElement(group));
          return;
        }

        const originals = group.originalElements.map(cloneTextElement);
        const distributed = distributeTextAcrossElements(normalizedText, originals);
        if (distributed) {
          rebuiltElements.push(...originals);
        } else {
          rebuiltElements.push(createMergedElement(group));
        }
        return;
      }

      rebuiltElements.push(...group.originalElements.map(cloneTextElement));
    });

    return {
      ...page,
      textElements: rebuiltElements,
      imageElements: images.map(cloneImageElement),
      contentStreams: page.contentStreams ?? [],
    };
  });

  return updated;
};

const approxEqual = (a: number | null | undefined, b: number | null | undefined, tolerance = 0.25): boolean => {
  const first = typeof a === 'number' && Number.isFinite(a) ? a : 0;
  const second = typeof b === 'number' && Number.isFinite(b) ? b : 0;
  return Math.abs(first - second) <= tolerance;
};

const arrayApproxEqual = (
  first: number[] | null | undefined,
  second: number[] | null | undefined,
  tolerance = 0.25,
): boolean => {
  if (!first && !second) {
    return true;
  }
  if (!first || !second) {
    return false;
  }
  if (first.length !== second.length) {
    return false;
  }
  for (let index = 0; index < first.length; index += 1) {
    if (!approxEqual(first[index], second[index], tolerance)) {
      return false;
    }
  }
  return true;
};

const areImageElementsEqual = (
  current: PdfJsonImageElement,
  original: PdfJsonImageElement,
): boolean => {
  if (current === original) {
    return true;
  }
  if (!current || !original) {
    return false;
  }

  const sameData = (current.imageData ?? null) === (original.imageData ?? null);
  const sameFormat = (current.imageFormat ?? null) === (original.imageFormat ?? null);

  return (
    sameData &&
    sameFormat &&
    approxEqual(current.x, original.x) &&
    approxEqual(current.y, original.y) &&
    approxEqual(current.width, original.width) &&
    approxEqual(current.height, original.height) &&
    approxEqual(current.left, original.left) &&
    approxEqual(current.right, original.right) &&
    approxEqual(current.top, original.top) &&
    approxEqual(current.bottom, original.bottom) &&
    (current.zOrder ?? null) === (original.zOrder ?? null) &&
    arrayApproxEqual(current.transform, original.transform)
  );
};

export const areImageListsDifferent = (
  current: PdfJsonImageElement[],
  original: PdfJsonImageElement[],
): boolean => {
  if (current.length !== original.length) {
    return true;
  }
  for (let index = 0; index < current.length; index += 1) {
    if (!areImageElementsEqual(current[index], original[index])) {
      return true;
    }
  }
  return false;
};

export const getDirtyPages = (
  groupsByPage: TextGroup[][],
  imagesByPage: PdfJsonImageElement[][],
  originalGroupsByPage: TextGroup[][],
  originalImagesByPage: PdfJsonImageElement[][],
): boolean[] => {
  return groupsByPage.map((groups, index) => {
    // Check if any text was modified
    const textDirty = groups.some((group) => group.text !== group.originalText);

    // Check if any groups were deleted by comparing with original groups
    const originalGroups = originalGroupsByPage[index] ?? [];
    const groupCountChanged = groups.length !== originalGroups.length;

    const imageDirty = areImageListsDifferent(
      imagesByPage[index] ?? [],
      originalImagesByPage[index] ?? [],
    );

    const isDirty = textDirty || groupCountChanged || imageDirty;

    if (groupCountChanged || textDirty) {
      console.log(`📄 Page ${index} dirty check:`, {
        textDirty,
        groupCountChanged,
        originalGroupsLength: originalGroups.length,
        currentGroupsLength: groups.length,
        imageDirty,
        isDirty,
      });
    }

    return isDirty;
  });
};
